library(tidyverse)
library(ggplot2)
library(DT)
library(haven)
library(ggpubr)
library(cowplot)
library(ggthemes)
library(readr)
library(ggExtra)
library(scales)
library(Rmisc)
library(ggbeeswarm) # to make jitter plots
library(RColorBrewer) # for the colors
library(patchwork)# load raw data
df_w1 <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1.rds")
df_w1_supp <- readRDS("~/Downloads/2023-095b_files/2023-095a_client_wave-1_supp.rds")
df_w2 <- readRDS("~/Downloads/2023-095b_files/2023-095b_client_wave-2.rds")
# bind wave 1 data + filter only Progress == 100
df_w1 = rbind(df_w1, df_w1_supp)
df_w1 |>
select(-user_id) |>
filter(Progress==100) -> df_w1
# invited users list (for merge)
Users_to_invite_Wave2 <- read_csv("Users_to_invite_Wave2.csv",
col_types = cols(user_id = col_character()))
Users_to_invite_Wave2 |>
select(user_id) |>
mutate(userid = user_id) -> for_merge
# merged with invited user list
df_w2 |>
merge(for_merge, by='userid') -> w2_users
# merged with invited user list
w2_users |>
select(userid, user_id) |>
merge(df_w1, by='userid') -> w1_users
# drop randomized_group == 'drop' from wave 2
w2_users |>
filter(randomized_group != 'drop') |>
filter(randomized_group != 'muting_treatment2_drop') |>
select(userid, randomized_group) -> wave2_data
names(wave2_data) = c('user_id', 'randomized_group')
# 134 users without drop
# muted_accounts
muted_accounts <- read_csv("muted_accounts.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
# exposure data
pre_hometimeline <- read_csv("pre_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character())) |> select(!created_at)
pre_hometimeline |>
mutate(tag = "pre_hometimeline") -> pre_hometimeline
post_hometimeline <- read_csv("post_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character())) |> select(!created_at)
post_hometimeline |>
mutate(tag = "post_hometimeline") -> post_hometimeline
# engagement data
pre_engagement <- read_csv("pre_engagement.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
pre_engagement |>
mutate(tag = "pre_engagement") -> pre_engagement
post_engagement <- read_csv("post_engagement.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
post_engagement |>
mutate(tag = "post_engagement") -> post_engagement
# likes data
pre_likes <- read_csv("pre_likes.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
names(pre_likes)[3] = "type"
pre_likes |>
mutate(tag = "pre_likes") -> pre_likes
post_likes <- read_csv("post_likes.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
names(post_likes)[3] = "type"
post_likes |>
mutate(tag = "post_likes") -> post_likes
# truncated list
updated_inventory <- read_csv("updated_inventory.csv",
col_types = cols_only(target_user_id = col_character()))
reduced_inventory = updated_inventory[c(1:489),]wave2_data |> merge(pre_engagement, by='user_id') -> this_1
wave2_data |> merge(post_engagement, by='user_id') -> this_2
wave2_data |> merge(pre_hometimeline, by='user_id') -> this_3
wave2_data |> merge(post_hometimeline, by='user_id') -> this_4
wave2_data |> merge(pre_likes, by='user_id') -> this_5
wave2_data |> merge(post_likes, by='user_id') -> this_6
rbind(this_1, this_2, this_3, this_4, this_5, this_6) -> testreduced_inventory |> mutate(reduced = "yes") -> reduced_inventory
muted_accounts |> mutate(muted = "yes") -> muted_accounts
test |>
merge(muted_accounts, by = c('user_id', 'target_user_id'), all.x = TRUE) |>
mutate(
muted = ifelse(is.na(muted), "no", muted)
) |>
merge(reduced_inventory, by='target_user_id', all.x = TRUE) |>
mutate(
reduced = ifelse(is.na(reduced), "no", reduced)
) |>
relocate(c(user_id, randomized_group)) -> this_is_full_data# count_engagement.csv
count_engagement <- read_csv("count_engagement.csv",
col_types = cols(user_id = col_character()))
this_is_full_data |>
merge(count_engagement, by=c('user_id','tag') ,all.x=TRUE) -> this_is_final_data
this_is_final_data |>
filter(user_id!="1165632418449186822" & user_id !='1590859787029213186' & user_id != '975899686774165504' & user_id != '375334044' & user_id != '1354968197078900743') -> this_is_final_dataData: this_is_final_data
N=128 (control: 46, media_literacy: 40, muting_treatment1: 41, muting_treatment2: 1)
Let’s remove those accounts that blocked the study account or are locked/suspended (we could not collect post-treatment data).
✅ N=123 (control: 45, media_literacy: 38, muting_treatment1: 40)
(user level)
user_id: Participants’ Twitter IDs
randomized_group: Randomized group -
media_literacy, muting_treatment1/2,
control
tag:
pre_hometimeline/engagement: home timeline (reverse
chronological) or engagement data collected before treatment (wave
2)
post_hometimeline/engagement: home timeline or
engagement data collected after treatment (wave 2) ~ till Mar 14
count: Number of tweets collected
(user-LQ account dyad level)
target_user_id: LQ accounts’ Twitter IDs (found in
home timeline or engagement data)
type: Type of tweets related to LQ sources
direct: Direct tweets mentioning LQ sources; Direct
tweets produced by LQ sources for likes
indirect: (only for likes) likes of tweets that
mention LQ sources (@) or their URLs
replied: Reply tweets to LQ sources
retweeted: Any retweets of LQ tweets
quoted: Any quote tweets of LQ tweets
muted : Whether the LQ source is muted (“yes”) or
not (“no”)
reduced : Whether the LQ source is included in the
reduced list (“yes”) or not (“no”)
For the mute group, post-treatment engagements/exposure to “muted” accounts vs. For the control group, post-treatment engagements/exposure to hypothetically “muted” accounts (accounts that would have been muted if they were assigned to the muting group)
Muting group: Post-treatment engagements
with/exposure to/likes of “muted” sourcesControl group (control + media_literacy):
Post-treatment engagements with/exposure to/likes of “hypothetically
muted” sources(This removed those users who have engagements/exposures exclusively to the LQ accounts that are “not” muted either in real or hypothetically, resulting in 92 unique users for the task 1.)
FYI) For muted accounts that you follow:
Replies and mentions by the muted account will still appear in your Notifications tab.
posts from a muted account – posted before the account was muted – will be removed from your Home timeline.
When you click or tap into a conversation, replies from muted accounts will be visible.
this_is_final_data |>
mutate(
group = ifelse(randomized_group=="media_literacy" |
randomized_group=='control', "Control", "Muting")
) |>
mutate(flag = ifelse(
group == "Control" & muted == "yes", "include",
# For Control group, only include cases where muted == "yes"
ifelse(
group == "Muting" & muted == "yes", "include",
# For Muting group, only include cases where muted == "yes"
"drop"
# The rest of the cases (muted == "no"), drop
)
)) |>
filter(flag == "include") |> select(!flag) -> test_again
test_again |>
select(user_id, tag, randomized_group, count) |>
group_by(user_id, tag) |>
count() |> ungroup() -> test_for_muted
# test_for_muted - if there are no categories (tag), freq == 0
test_for_muted |>
select(user_id, tag, freq) |>
pivot_wider(names_from=tag, values_from=freq) |>
mutate_if(is.numeric , replace_na, replace = 0) |>
relocate(user_id,
pre_hometimeline, post_hometimeline,
pre_engagement, post_engagement,
pre_likes, post_likes) -> test_for_muted_wider
test_for_muted_wider |>
select(user_id, pre_hometimeline:post_likes) |>
pivot_longer(cols=pre_hometimeline:post_likes,
names_to = "tag",
values_to = "n") -> test_for_muted_longer
test_again |>
select(user_id, tag, count) |>
unique() |>
merge(test_for_muted_longer, by=c("user_id", "tag"), all=TRUE) |>
mutate_if(is.numeric , replace_na, replace = 0) -> test_for_muted_longer2
test_again |> select(user_id, tag, randomized_group) |> unique() -> for_merge_muted_1
for_merge_muted_1 |>
select(user_id, randomized_group) |>
unique() -> for_merge_muted_2
test_for_muted_longer2 |>
merge(for_merge_muted_1, by = c('user_id', 'tag'), all.x = T) |>
select(user_id, tag, count, n) |>
merge(for_merge_muted_2, by = "user_id") |>
mutate(group = ifelse(randomized_group == "control" |
randomized_group == "media_literacy",
"Control", "Muting")) -> test_for_muted_longer3
# backup
df_plot_muted = test_for_muted_longer3Post-treatment engagement (tweets, re-tweets, quotes, replies) with
(hypothetically) muted accounts between control
vs. muting
df_plot_muted |>
mutate(rate = (n/count)*100) |>
mutate_if(is.numeric , replace_na, replace = 0) |>
mutate(rate = round(rate,1)) |>
filter(tag == 'post_engagement') |> # tag == 'post_engagement' filter
select(user_id, group, n, rate) -> df_post_engagement
df_post_engagement |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_post_engagement |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_post_engagement |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
scale_y_continuous(n.breaks=15) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> bar_post_engagement
df_post_engagement |>
ggplot(aes(x = group, y = n)) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.8, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.8, size = 3,
aes(fill = group)) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
scale_y_sqrt(n.breaks=15) +
labs(x = "Group",
y = "Count (sqrt scale)",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> sqrt_post_engagement
plot_post_engage <- bar_post_engagement +
plot_annotation('Plot 1. Post-Treatment Engagements',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_engageThe y-axis (Count) indicates the number of tweets
related to LQ sources that one engaged with (=engagement). These include
direct mentions of LQ accounts, replies, retweets, and quote tweets of
tweets created by LQ accounts, as well as direct URLs to these LQ
domains.
Group: Control includes control group and media literacy
group, while Muting includes muting treatment groups. For
Muting group, only engagements with those LQ accounts that
are muted are counted. Note that in pilot study, we muted 70%
of the truncated list (truncated list: n=489).
Hence, Plot 1 allows us to compare the post-treatment engagements
with (hypothetically) muted LQ accounts between Control and
Muting groups.
For most participants, we see low levels of engagements with LQ accounts with a few outliers (active ones). Given this highly skewed distribution, and to be careful with comparison, I overlayed the same quantities but with different ways of visualizing them.
Bar plot
Comparison: mean
Error bars indicate SEs of the mean
t-test statistics (p-value presented)
Dot plot
Comparison: distribution
K-S test (nonparametric)
Post-treatment exposure between control
vs. muting (for muted accounts)
df_plot_muted |>
filter(tag == "post_hometimeline") |> # tag == 'post_hometimeline' filter
select(user_id, group, n) -> df_post_hometimeline
df_post_hometimeline |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_post_hometimeline |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_post_hometimeline |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_post_hometimeline
plot_post_exposure <- bar_post_hometimeline +
plot_annotation('Plot 2-1. Post-Treatment Exposure',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_exposureThe y-axis (Count) indicates the number of tweets from
LQ sources (that are muted) found in one’s home timeline
(=exposure).
Hence, Plot 2 allows us to compare the post-treatment exposures to
(hypothetically) muted LQ accounts between Control and
Muting groups.
Since we pull reverse chronological home timeline (exposure data) once, how about calculating activity rates, instead of using count?
# exposure data
post_hometimeline_final <- read_csv("post_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
aggregate(post_hometimeline_final$created_at,
by=list(post_hometimeline_final$user_id), max) -> max_agg
aggregate(post_hometimeline_final$created_at,
by=list(post_hometimeline_final$user_id), min) -> min_agg
names(max_agg) = c("user_id", "max_time")
names(min_agg) = c("user_id", "min_time")
max_agg |> merge(min_agg, by='user_id') |>
mutate( timeinterval= as.duration(min_time %--% max_time),
in_days = round(as.numeric(max_time - min_time)/86400, 5),
in_hours =round(as.numeric(max_time - min_time)/3600, 5)) -> merge_agg
# merge timegap variables into df_post_hometimeline
df_post_hometimeline |>
merge(merge_agg, by='user_id', all.x=TRUE) |>
mutate_at(c(6:8), ~replace(., is.na(.), 0)) -> post_home_merged
post_home_merged |>
mutate(activity_rate = n/in_days) |>
mutate_at(c(9), ~replace(., is.nan(.), 0)) |>
mutate_at(c(9), ~replace(., is.infinite(.), 0)) -> post_home_mergedPlot 2-2. Post-Treatment Exposure (Daily activity rate)
# post_home_merged
post_home_merged |>
filter(group == "Control") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group1
post_home_merged |>
filter(group == "Muting") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group2
post_home_merged |>
ggplot(aes(x = group, y = activity_rate)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "tweets / day",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_post_hometimeline_rate
plot_post_exposure_rate <- bar_post_hometimeline_rate +
plot_annotation('Plot 2-2. Post-Treatment Exposure (Daily activity rate)',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_exposure_rateSetting the y-axis into log10 scale to better understand the distribution:
post_home_merged |>
ggplot(aes(x = group, y = log10(activity_rate+1))) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "log10((tweets/day)+1)",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> bar_post_hometimeline_rate_log
plot_post_exposure_rate_log <- bar_post_hometimeline_rate_log +
plot_annotation('Plot 2-3. Post-Treatment Exposure (Daily activity rate; log10(y+1))',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_exposure_rate_logdf_plot_muted |>
filter(tag == "post_likes") |> # tag == 'post_likes' filter
select(user_id, group, n) -> df_post_likes
df_post_likes |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_post_likes |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_post_likes |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_post_likes
plot_post_likes <- bar_post_likes +
plot_annotation('Plot 3. Post-Treatment Likes',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_post_likesThe y-axis (Count) indicates the number of likes to
tweets related to LQ sources that are muted (=likes).
Hence, Plot 3 allows us to compare the post-treatment likes to
(hypothetically) muted LQ accounts between Control and
Muting groups.
Pre-treatment engagement between control
vs. muting (for muted accounts)
# retrieve recruitment mode
w1_users |> select(user_id, vs_recruitment) -> recruitmode
# merge
df_plot_muted |>
merge(recruitmode, by='user_id', all.x = TRUE) -> df_plot_muted_recruit
df_plot_muted_recruit |>
filter(tag == "pre_engagement") |>
select(user_id, group, n, vs_recruitment) -> df_pre_engagement
df_pre_engagement |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_engagement |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_engagement |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_engagement
bar_pre_engagement + plot_annotation('Plot 4. Pre-Treatment Engagements',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))Pre-treatment exposure between control
vs. muting (for muted accounts)
# Use df_plot_unmuted
df_plot_muted_recruit |>
filter(tag == "pre_hometimeline") |> # tag == 'post_hometimeline' filter
select(user_id, group, n, vs_recruitment) -> df_pre_hometimeline
pre_hometimeline_final <- read_csv("pre_hometimeline_final.csv",
col_types = cols(user_id = col_character(),
target_user_id = col_character()))
aggregate(pre_hometimeline_final$created_at,
by=list(pre_hometimeline_final$user_id), max) -> max_agg
aggregate(pre_hometimeline_final$created_at,
by=list(pre_hometimeline_final$user_id), min) -> min_agg
names(max_agg) = c("user_id", "max_time")
names(min_agg) = c("user_id", "min_time")
max_agg |> merge(min_agg, by='user_id') |>
mutate( timeinterval= as.duration(min_time %--% max_time),
in_days = round(as.numeric(max_time - min_time)/86400, 5),
in_hours =round(as.numeric(max_time - min_time)/3600, 5)) -> merge_agg
# merge timegap variables into df_pre_hometimeline
df_pre_hometimeline |>
merge(merge_agg, by='user_id', all.x=TRUE) |>
mutate_at(c(7:9), ~replace(., is.na(.), 0)) -> pre_home_merged
pre_home_merged |>
mutate(activity_rate = n/in_days) |>
mutate_at(c(10), ~replace(., is.nan(.), 0)) |>
mutate_at(c(10), ~replace(., is.infinite(.), 0)) -> pre_home_merged
df_pre_hometimeline |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_hometimeline |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_hometimeline |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_exposure
bar_pre_exposure + plot_annotation('Plot 5. Pre-Treatment Exposure',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))# post_home_merged
pre_home_merged |>
filter(group == "Control") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group1
pre_home_merged |>
filter(group == "Muting") |>
select(activity_rate) |>
as.vector() |>
unlist() |>
as.integer() -> group2
pre_home_merged |>
ggplot(aes(x = group, y = activity_rate)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "tweets / day",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_hometimeline_rate
plot_pre_exposure_rate <- bar_pre_hometimeline_rate +
plot_annotation('Plot 5-2. Pre-Treatment Exposure (Daily activity rate)',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_pre_exposure_ratepre_home_merged |>
ggplot(aes(x = group, y = log10(activity_rate+1))) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "log10((tweets/day)+1)",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
)) -> bar_pre_hometimeline_rate_log
plot_pre_exposure_rate_log <- bar_pre_hometimeline_rate_log +
plot_annotation('Plot 5-3. Pre-Treatment Exposure (Daily activity rate; log10(y+1))',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))
plot_pre_exposure_rate_logPre-treatment exposure between control
vs. muting (for muted accounts)
df_plot_muted_recruit |>
filter(tag == "pre_likes") |>
select(user_id, group, n, vs_recruitment) -> df_pre_likes
df_pre_likes |>
filter(group == "Control") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_likes |>
filter(group == "Muting") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_likes |>
ggplot(aes(x = group, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = group)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6, aes(fill = group)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:2]) +
labs(x = "Group",
y = "Count",
caption = paste0("P = ",
signif(t.test(group1, group2)$p.value, 2),
" (t test)", "\n",
"P = ",
signif(ks.test(group1, group2)$p.value, 2),
" (Kolmogorov–Smirnov test)")) +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Control: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), "\n",
"Muting: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)
))-> bar_pre_likes
bar_pre_likes + plot_annotation('Plot 6. Pre-Treatment Likes',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))In general, no differences between control vs. muting groups in pre-treatment engagements, exposure, and likes!
Pre-treatment Engagement
# ENGAGEMENT
df_pre_engagement |>
filter(vs_recruitment == "Twitter recruitment") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_engagement |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_engagement |>
filter(vs_recruitment == "Existing panelist") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group3
df_pre_engagement |>
ggplot(aes(x = vs_recruitment, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_engagement_recruit
bar_pre_engagement_recruit +
plot_annotation('Plot 7. Pre-Treatment Engagement by Recruitment Mode',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))Pre-treatment Exposure
# EXPOSURE
df_pre_hometimeline |>
filter(vs_recruitment == "Twitter recruitment") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_hometimeline |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_hometimeline |>
filter(vs_recruitment == "Existing panelist") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group3
df_pre_hometimeline |>
ggplot(aes(x = vs_recruitment, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_exposure_recruit
bar_pre_exposure_recruit +
plot_annotation('Plot 8. Pre-Treatment Exposure by Recruitment Mode',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))pre-treatment likes
# LIKES
df_pre_likes |>
filter(vs_recruitment == "Twitter recruitment") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group1
df_pre_likes |>
filter(vs_recruitment == "New panelist recruited after January 1, 2024") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group2
df_pre_likes |>
filter(vs_recruitment == "Existing panelist") |>
select(n) |>
as.vector() |>
unlist() |>
as.integer() -> group3
df_pre_likes |>
ggplot(aes(x = vs_recruitment, y = n)) +
ggbeeswarm::geom_quasirandom(
shape = 21, color = "white",
alpha = 0.9, size = 3,
aes(fill = vs_recruitment)
) +
geom_bar(stat = "summary", fun = mean, width = 0.7, alpha = 0.6,
aes(fill = vs_recruitment)) +
stat_summary(geom = "errorbar", fun.data = "mean_se", width = 0.1, linewidth = 1) +
scale_fill_manual(values = brewer.pal(8, "Accent")[1:3]) +
labs(x = "Recruitment Mode",
y = "Count") +
theme_classic() +
theme(
text = element_text(size = 12, face = "bold", color = "black"),
axis.text = element_text(color = "black"),
legend.position = "none",
plot.title = element_text(size = 10),
plot.caption = element_text(hjust = 0)
) +
ggtitle(
paste0(
"Twitter recruitment: mean = ", signif(mean(group1), 2),
"; sd = ", signif(sd(group1), 2), "; median = ", signif(median(group1), 2),
"; IQR = ", signif(IQR(group1), 2), " (N=", length(group1), ")", "\n",
"New panelist: mean = ", signif(mean(group2), 2),
"; sd = ", signif(sd(group2), 2),
"; median = ", signif(median(group2), 2),
"; IQR = ", signif(IQR(group2), 2)," (N=", length(group2), ")", "\n",
"Existing panelist: mean = ", signif(mean(group3), 2),
"; sd = ", signif(sd(group3), 2),
"; median = ", signif(median(group3), 2),
"; IQR = ", signif(IQR(group3), 2)," (N=", length(group3), ")"
)) -> bar_pre_likes_recruit
bar_pre_likes_recruit + plot_annotation('Plot 9. Pre-Treatment Likes by Recruitment Mode',
theme=theme(plot.title=element_text(size = 12,
face = "bold",
color = "black",
hjust=0.5)))I collected post-treatment data on Mar 26, which is ~26 days after the start of Wave 2. Thus, the post-treatment data contains minimum 19 days ~ maximum 26 days post Wave 2, while pre-treatment data contains maximum 4 weeks.
Post-treatment data: Considering that muting took ~1 day or more, I filtered data with created_at >= Wave 2 session start time + 2 days, to make sure that I only include tweets from LQ sources after the muting job is done.